OHI British Columbia | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
library(data.table)
source('~/github/src/R/common.R') ###
### includes library(tidyverse); library(stringr); dir_M points to ohi directory
dir_git <- '~/github/spp_health_dists'
dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')
### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
library(provRmd); prov_setup()
### support scripts
source('~/github/src/R/rast_tools.R')
### raster plotting and analyzing scriptsCreate a map of the distribution of current species health - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers)
Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_file <- file.path(dir_git, 'data', 'current_risk_by_loiczid.csv')
if(!file.exists(cell_risk_file)) {
iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_current_risk <- fread(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv'), filetype = 'input')
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(), mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_file)
} else {
git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv'), filetype = 'input')
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
git_prov(cell_risk_file, filetype = 'output')
}loiczid_rast <- raster(file.path(dir_git, 'data/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell <- read_csv(file.path(dir_git, 'data', 'current_risk_by_loiczid.csv'),
col_types = 'iid')
risk_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'n_spp')
plot(risk_rast)plot(nspp_rast)risk_clipped <- risk_rast
values(risk_clipped)[values(nspp_rast) < 5] <- NA
plot(risk_clipped)Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.
cell_trend_file <- file.path(dir_git, 'data', 'spp_trend_by_loiczid.csv')
if(!file.exists(cell_trend_file)) {
iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_trend <- fread(file.path(dir_git, 'data', 'trend_by_spp.csv')) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'trend_by_spp.csv'), filetype = 'input')
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']
spp_cells_trend_sum <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(), mean_trend = mean(trend_score))
write_csv(spp_cells_trend_sum, cell_trend_file)
} else {
git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'trend_by_spp.csv'), filetype = 'input')
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
git_prov(cell_trend_file, filetype = 'output')
}loiczid_rast <- raster(file.path(dir_git, 'data/loiczid_raster.tif'))
### gotta force the mean_trend column to be double; there are lots of zero
### values so will default to thinking that column is integer.
trend_by_cell <- read_csv(file.path(dir_git, 'data', 'spp_trend_by_loiczid.csv'),
col_types = 'iid')
trend_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'mean_trend')
nspp_tr_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'n_spp')
plot(trend_rast)plot(nspp_tr_rast)trend_clipped <- trend_rast
values(trend_clipped)[values(nspp_tr_rast) < 5] <- NA
plot(trend_clipped)Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
Using the time-series category scores, cast forward and back to fill out all year time series; then select to specific years and save.
iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_ts_risk <- fread(file.path(dir_git, 'data', 'iucn_spp_cat_timeseries.csv')) %>%
select(iucn_sid, year, cat_ts_score) %>%
group_by(iucn_sid) %>%
mutate(n_assess = n()) %>%
complete(year = 1965:2017) %>%
fill(cat_ts_score, .direction = 'down') %>%
ungroup() %>%
filter(!is.na(iucn_sid) & !is.na(cat_ts_score)) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_timeseries.csv'), filetype = 'input')
spp_assessed <- spp_ts_risk %>%
group_by(year) %>%
mutate(n_spp = n()) %>%
ungroup()
ggplot(spp_assessed, aes(x = year, y = n_spp)) +
ggtheme_plot() +
geom_line() +
labs(title = 'Number of assessed species over time',
x = 'year', y = 'total assessed species')spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv')) %>%
filter(prob >= .60)##
Read 0.0% of 15023644 rows
Read 13.5% of 15023644 rows
Read 26.0% of 15023644 rows
Read 39.7% of 15023644 rows
Read 53.7% of 15023644 rows
Read 67.6% of 15023644 rows
Read 81.6% of 15023644 rows
Read 93.9% of 15023644 rows
Read 15023644 rows and 3 (of 3) columns from 0.312 GB file in 00:00:12
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### Load raster and substitute mean risks
loiczid_rast <- raster(file.path(dir_git, 'data/loiczid_raster.tif'))
for(yr in c(1990, 1995, 2000, 2005, 2010:2017)) { ### yr <- 1990
risk_df <- spp_ts_risk %>%
filter(year == yr) %>%
left_join(spp_cells, by = 'am_sid') %>%
group_by(loiczid) %>%
summarize(mean_risk = mean(cat_ts_score),
n_spp = n())
risk_rast <- subs(loiczid_rast, risk_df, by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, risk_df, by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = paste0('Mean risk, ', yr))
plot(nspp_rast, main = paste0('# assessed spp, ', yr))
risk_clipped <- risk_rast
values(risk_clipped)[values(nspp_rast) < 5] <- NA
plot(risk_clipped, main = paste0('Mean risk, n_spp > 5, ', yr))
}prov_wrapup(commit_outputs = FALSE)